
clear
set more off

cd "D:\data_replication"

// Import and format s20 and s80 data
//==============================================================================

import delimited estimation\4_demand_estimation\4_cobb_douglas_weights\share_summary.csv

// Set shares to 0 if prediction is < 0
//------------------------------------------------------------------------------

replace s_20 = 0 if s_20 < 0													// 16,725 / 591,326 changes
replace s_80 = 0 if s_80 < 0													// 8,308 / 591,326 changes

replace s_20 = s_true if pc8 == 0
replace s_80 = s_true if pc8 == 0


// Normalize sum of shares to 1
//------------------------------------------------------------------------------

sort year quarter declarant
by year quarter declarant: egen s_true_sum = sum(s_true)
by year quarter declarant: egen s20_sum = sum(s_20)
by year quarter declarant: egen s80_sum = sum(s_80)

replace s_20 = s_20 / s20_sum
replace s_80 = s_80 / s80_sum


// Format and save
//------------------------------------------------------------------------------

keep pc8 year quarter declarant s_20 s_80
sort year quarter declarant pc8
order year quarter declarant pc8

save estimation\4_demand_estimation\4_cobb_douglas_weights\weights_pc8.dta, replace
export delimited using estimation\4_demand_estimation\4_cobb_douglas_weights\weights_pc8.csv, replace


// pc8plus - disaggregated
//==============================================================================

// Get shares of synthetic categories
//------------------------------------------------------------------------------

import delimited estimation\4_demand_estimation\4_cobb_douglas_weights\Y_summary.csv, clear
gen ll = length(pc8plus)
keep if ll < 8                                                                  // Only synthetic
rename pc8plus pc8
destring pc8, replace
keep pc8 year quarter declarant share


// Combine with previous data, adjust synthetic shares to be consistent with total
//------------------------------------------------------------------------------

append using estimation\4_demand_estimation\4_cobb_douglas_weights\weights_pc8.dta

sort year quarter declarant pc8
tostring pc8, gen(pc8_string)
gen ll = length(pc8_string)
by year quarter declarant: egen share_pc8plus = sum(share)
by year quarter declarant: gen s_20_0_temp = s_20 if pc8 == 0
by year quarter declarant: egen s_20_0 = mean(s_20_0_temp) 
by year quarter declarant: gen s_80_0_temp = s_80 if pc8 == 0
by year quarter declarant: egen s_80_0 = mean(s_80_0_temp) 

replace s_20 = share * s_20_0 / share_pc8plus if ll ==6 | ll == 7
replace s_80 = share * s_80_0 / share_pc8plus if ll ==6 | ll == 7
drop if pc8 == 0


// Format and save
//------------------------------------------------------------------------------

keep pc8 year quarter declarant s_20 s_80
sort year quarter declarant pc8
order year quarter declarant pc8

save estimation\4_demand_estimation\4_cobb_douglas_weights\weights_pc8.dta, replace
export delimited using estimation\4_demand_estimation\4_cobb_douglas_weights\weights_pc8.csv, replace


